library(janitor)

Attaching package: ‘janitor’

The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test
library(readODS)
library(httr)
library(here)
here() starts at /Users/tomdavie/Documents/GitHub/ev_climate_change_project
library(data.table)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
data.table 1.14.0 using 1 threads (see ?getDTthreads).  Latest news: r-datatable.com
**********
This installation of data.table has not detected OpenMP support. It should still work but in single-threaded mode.
This is a Mac. Please read https://mac.r-project.org/openmp/. Please engage with Apple and ask them for support. Check r-datatable.com for updates, and our Mac instructions here: https://github.com/Rdatatable/data.table/wiki/Installation. After several years of many reports of installation problems on Mac, it's time to gingerly point out that there have been no similar problems on Windows or Linux.
**********
library(sf)
Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1
library(leaflet)
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
library(leaflet.extras)
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✓ ggplot2 3.3.5     ✓ purrr   0.3.4
✓ tibble  3.1.3     ✓ dplyr   1.0.7
✓ tidyr   1.1.3     ✓ stringr 1.4.0
✓ readr   2.0.1     ✓ forcats 0.5.1
── Conflicts ───────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::between()   masks data.table::between()
x dplyr::filter()    masks stats::filter()
x dplyr::first()     masks data.table::first()
x dplyr::lag()       masks stats::lag()
x dplyr::last()      masks data.table::last()
x purrr::transpose() masks data.table::transpose()
library(grDevices)
# loading in shape file 
uk_shape_file <- st_read(here("raw_data/LA_shape/Local_Authority_Districts__April_2019__UK_BFE_v2.shp")) %>%
  clean_names() %>% 
  st_simplify(dTolerance = 1000) %>%
  st_transform("+proj=longlat +datum=WGS84") %>% 
  dplyr::select(lad19cd, long, lat, geometry) 
Reading layer `Local_Authority_Districts__April_2019__UK_BFE_v2' from data source 
  `/Users/tomdavie/Documents/GitHub/ev_climate_change_project/raw_data/LA_shape/Local_Authority_Districts__April_2019__UK_BFE_v2.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 382 features and 10 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -116.1928 ymin: 5333.603 xmax: 655989 ymax: 1220302
Projected CRS: OSGB 1936 / British National Grid
# Load in clean Electric Vehicles by Local Authority data
uk_ev <- read_csv(here("clean_data/ev_by_la_clean.csv"))
Rows: 475 Columns: 40
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (40): ons_la_code_apr_2019, region_local_authority_apr_2019_3, x2021_q1, x2020_q4, x2020_q3, x2020_q2, x2020_q1, x2...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Joining uk_ev + shape file 
uk_ev_map <- uk_ev %>% 
  left_join(uk_shape_file, by = c("ons_la_code_apr_2019" = "lad19cd")) %>% 
  drop_na() %>% 
  mutate(across(c(x2021_q1:x2011_q4), as.numeric)) %>% 
  st_as_sf()
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
Warning: Problem with `mutate()` input `..1`.
ℹ `..1 = across(c(x2021_q1:x2011_q4), as.numeric)`.
ℹ NAs introduced by coercion
# Set colours and bins
pal <- colorBin("Greens", domain = uk_ev_map$x2021_q1, bins = c(0, 500, 1000, 2500, 5000, 10000, 15000))

# Set labels
uk_ev_map_labels <- sprintf(
  "<strong>%s</strong><br/>%g Electric Vehicles",
  uk_ev_map$region_local_authority_apr_2019_3, uk_ev_map$x2021_q1) %>% 
  lapply(htmltools::HTML)
# Geospatial of EV Vehicles in the UK 2021 Q1
uk_ev_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(x2021_q1),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = uk_ev_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~x2021_q1, opacity = 0.7, title = NULL,
            position = "bottomright")
    pal <- colorBin("Greens", domain = uk_ev_map$x2021_q1, bins = c(0, 500, 1000, 2500, 5000, 10000, 15000))
    
    uk_ev_map_labels <- sprintf(
      "<strong>%s</strong><br/>%g Electric Vehicles",
      uk_ev_map$region_local_authority_apr_2019_3, uk_ev_map$x2021_q1) %>% 
      lapply(htmltools::HTML)
# Geospatial of EV Vehicles in the UK 2021 Q1
uk_ev_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(x2021_q1),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = uk_ev_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~x2021_q1, opacity = 0.7, title = NULL,
            position = "bottomright")
# Wrangling to create an EV count over time plot 
uk_ev_longer <- uk_ev %>%
  # Pivot longer to get year and count columns
  pivot_longer(cols = c(x2021_q1:x2011_q4), names_to = c("year"), values_to = "no_of_ev") %>% 
  # Filter so we only have UK as a whole data AND we only want final numbers of the year so Q4 
  filter(region_local_authority_apr_2019_3 == "United Kingdom" & str_detect(year, "q4")) %>% 
  # Simplify to just show year
  mutate(year = case_when(str_detect(year, "2021") ~ "2021",
         str_detect(year, "2020") ~ "2020",
         str_detect(year, "2019") ~ "2019",
         str_detect(year, "2018") ~ "2018",
         str_detect(year, "2017") ~ "2017",
         str_detect(year, "2016") ~ "2016",
         str_detect(year, "2015") ~ "2015",
         str_detect(year, "2014") ~ "2014",
         str_detect(year, "2013") ~ "2013",
         str_detect(year, "2012") ~ "2012",
         str_detect(year, "2011") ~ "2011"),
         year = as.numeric(year),
         no_of_ev = as.numeric(no_of_ev))
uk_ev_longer %>% 
  ggplot() +
  aes(x = year, y = no_of_ev) +
  geom_col(fill = "#08a44c") +
  scale_x_continuous(breaks = c(2011:2020)) +
  scale_y_continuous(breaks = seq(0, 220000, by = 20000), limits = c(0, 220000)) +
  labs(title = "\nNumber of Electric Vehicles over time in the UK\n",
       x = "\nYear\n",
       y = "\nNumber of Electric Vehicles\n") +
  theme_minimal() 

Row binding grid NO2 data

no2_2010 <- read_csv(here("raw_data/no2_by_grid_2010.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
Rows: 281802 Columns: 4
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): no2
dbl (3): uk_grid_code, x, y

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no2_2011 <- read_csv(here("raw_data/no2_by_grid_2011.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
Rows: 281802 Columns: 4
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): no2
dbl (3): uk_grid_code, x, y

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no2_2012 <- read_csv(here("raw_data/no2_by_grid_2012.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
Rows: 281802 Columns: 4
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): no2
dbl (3): uk_grid_code, x, y

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no2_2013 <- read_csv(here("raw_data/no2_by_grid_2013.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
Rows: 281802 Columns: 4
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): no2
dbl (3): uk_grid_code, x, y

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no2_2014 <- read_csv(here("raw_data/no2_by_grid_2014.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
Rows: 281802 Columns: 4
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): no2
dbl (3): uk_grid_code, x, y

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no2_2015 <- read_csv(here("raw_data/no2_by_grid_2015.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
Rows: 281802 Columns: 4
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): no2
dbl (3): uk_grid_code, x, y

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no2_2016 <- read_csv(here("raw_data/no2_by_grid_2016.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
Rows: 281802 Columns: 4
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): no2
dbl (3): uk_grid_code, x, y

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no2_2017 <- read_csv(here("raw_data/no2_by_grid_2017.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
Rows: 281802 Columns: 4
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): no2
dbl (3): uk_grid_code, x, y

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no2_2018 <- read_csv(here("raw_data/no2_by_grid_2018.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
Rows: 281802 Columns: 4
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): no2
dbl (3): uk_grid_code, x, y

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no2_2019 <- read_csv(here("raw_data/no2_by_grid_2019.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
Rows: 281802 Columns: 4
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): no2
dbl (3): uk_grid_code, x, y

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Create empty data frame
no2_all <- data_frame()
Warning: `data_frame()` was deprecated in tibble 1.1.0.
Please use `tibble()` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.
# For each year, bind rows to one dataset
for (i in 2010:2019) {
  df_name <- paste0("no2_", i)
  df_input <- as.name(df_name)
  
  df <- eval(df_input) %>% 
    mutate(year = i)
  
no2_all <- bind_rows(no2_all, df)
  }
# Remove missing NO2 grid values
no2_clean <- no2_all %>% 
  filter(no2 != "MISSING")
library(proj4)
proj4string <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"

no2_clean_row <- no2_clean %>% 
  rowid_to_column()

# Source data
xy <- no2_clean_row %>% 
  select(x, y, rowid)

# Transformed data
pj <- project(xy, proj4string, inverse=TRUE)
Warning in project(xy, proj4string, inverse = TRUE) :
  more than two dimensions found, using first two
latlon <- data.frame(xy, lat=pj$y, lon=pj$x)
final <-  merge(no2_clean_row, latlon, by.x = "rowid", by.y = "rowid") %>%
  filter(year == 2019) %>% 
  select(lat, lon, no2) 
final <- final %>% 
  mutate(no2 = as.numeric(no2)) 
bins <- 10
pal <- colorBin("Spectral", domain = final$no2, bins = bins, na.color = "transparent", reverse = TRUE)

leaflet() %>%
  addProviderTiles("CartoDB.Positron", options = providerTileOptions(noWrap = TRUE)) %>%
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addHeatmap(data = final,
             lng = ~lon,
             lat = ~lat,
             intensity = ~no2,
             minOpacity = 0.1,
             max = 45,
             radius = 1,
             blur = 1) %>% 
  addLegend(pal = pal, values = final$no2,
                title="Average NO2 Conc in 2019")
no2_annual_mean <- read_ods(here("raw_data/NO2_tables.ods"), sheet = 3, skip = 2) %>% 
  clean_names()
no2_annual_mean_all <- no2_annual_mean %>% 
  filter(site == "All sites") %>% 
  mutate(year = as.numeric(year))
no2_annual_mean_all %>% 
  ggplot() +
  aes(x = year, y = annual_mean_no2concentration_mg_m3) + 
  geom_line() +
  theme_minimal() +
  scale_y_continuous(breaks = seq(0, 65, 5), limits = c(22, 62)) +
  scale_x_continuous(breaks = seq(1997, 2020, 1)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(x = "\nYear\n", 
       y = "\nAnnual Mean NO2 Conc mg m3\n",
       title = "\nNO2 over time in the UK\n") +
png("no2_time_uk.png", units="in", width=8, height=5, res=300)
# insert ggplot code
dev.off()
null device 
          1 
library(tidyverse)
library(fable)
Loading required package: fabletools
library(tsibble)

Attaching package: ‘tsibble’

The following object is masked from ‘package:data.table’:

    key

The following objects are masked from ‘package:base’:

    intersect, setdiff, union
library(tsibbledata)
library(ggfortify)
forecast_no2 <- no2_annual_mean_all %>% 
  dplyr::select(year, annual_mean_no2concentration_mg_m3) %>% 
  tsibble(index = year)
autoplot(forecast_no2) 
Plot variable not specified, automatically selected `.vars = annual_mean_no2concentration_mg_m3`

fit <- forecast_no2 %>%
  model(
    arima = ARIMA(annual_mean_no2concentration_mg_m3)
  )
fit
forecast_1 <- fit %>%
  fabletools::forecast(h = "15 years")
forecast_1
forecast_1 %>%
  autoplot(forecast_no2, level = NULL) +
  ggtitle("Forecasts for NO2 Conc over time") +
  xlab("Year") +
  guides(colour = guide_legend(title = "Forecast")) +
    theme_minimal() +
  scale_y_continuous(breaks = seq(0, 65, 5), limits = c(0, 62)) +
  scale_x_continuous(breaks = seq(1997, 2034, 3)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(x = "\nYear\n", 
       y = "\nAnnual Mean NO2 Conc mg m3\n",
       title = "\nNO2 over time in the UK\n") +
  scale_y_continuous(limit = c(0, 65)) +
  png("no2_time_forecast.png", units="in", width=8, height=5, res=300)
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Warning: Removed 1 row(s) containing missing values (geom_path).
# insert ggplot code
dev.off()
null device 
          1 
# Now set our training data from 1992 to 2006
train <- forecast_no2 %>%
  filter(year <= 2017)

# run the model on the training set 
fit_train <- train %>%
  model(
    arima = ARIMA(annual_mean_no2concentration_mg_m3)
  )
# forecast from the training set
forecast_test <- fit_train %>% 
  fabletools::forecast(h = "4 years")

# Plot forecasts against actual values
forecast_test %>%
  autoplot(train, level = NULL) +
    autolayer(filter(forecast_no2, year >= 2017), color = "black") +
    ggtitle("Forecasts for NO2 Conc over time") +
    xlab("Year") + ylab("Megalitres") +
    guides(colour=guide_legend(title="Forecast"))
Plot variable not specified, automatically selected `.vars = annual_mean_no2concentration_mg_m3`

# Binding 2014 and 2019
no2_2014 <- read_csv(here("raw_data/no2_by_grid_2014.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2")) %>% 
  add_column(year = "2014")
Rows: 281802 Columns: 4
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): no2
dbl (3): uk_grid_code, x, y

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no2_2019 <- read_csv(here("raw_data/no2_by_grid_2019.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2")) %>% 
  add_column(year = "2019")
Rows: 281802 Columns: 4
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): no2
dbl (3): uk_grid_code, x, y

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no2_difference <- bind_rows(no2_2014, no2_2019)
# Diff in NO2 between 2014 & 2019
no2_difference <- no2_difference %>% 
  filter(no2 != "MISSING") %>% 
  mutate(no2 = as.numeric(no2),
         year = as.numeric(year)) %>% 
  group_by(x, y) %>% 
  pivot_wider(names_from = "year", values_from = "no2") %>% 
  rename("x2014" = "2014",
         "x2019" = "2019") %>% 
  mutate(no2_diff_2014_2019 = x2014 - x2019)  %>% 
#Remove this separation when put in cleaning script
  ungroup() %>% 
#Remove this separation when put in cleaning script
  drop_na() %>% 
#Remove this separation when put in cleaning script
# Changing all negative numbers to zero as these aren't of interest to us
  mutate(no2_diff_2014_2019 = if_else(no2_diff_2014_2019 < 0, 0, no2_diff_2014_2019))
# Converting NO2 diff from x y to lat long
library(proj4)
proj4string <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"

no2_diff_row <- no2_difference %>% 
  rowid_to_column()

# Source data
xy <- no2_diff_row %>% 
  dplyr::select(x, y, rowid)

# Transformed data
pj <- project(xy, proj4string, inverse=TRUE)
Warning in project(xy, proj4string, inverse = TRUE) :
  more than two dimensions found, using first two
latlon <- data.frame(xy, lat=pj$y, lon=pj$x)
no2_diff_final <-  merge(no2_diff_row, latlon, by.x = "rowid", by.y = "rowid") %>%
  dplyr::select(lat, lon, no2_diff_2014_2019) 
# Geospatial of No2 diff 2014 to 2019
bins <- c(0, 5, 10, 15, 20, 25, 30)
pal <- colorBin("Spectral", domain = no2_diff_final$no2_diff_2014_2019, bins = bins, na.color = "transparent", reverse = TRUE)

leaflet() %>%
  addProviderTiles("CartoDB.Positron", options = providerTileOptions(noWrap = TRUE)) %>%
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addHeatmap(data = no2_diff_final,
             lng = ~lon,
             lat = ~lat,
             intensity = ~no2_diff_2014_2019,
             minOpacity = 0.1,
             max = 30,
             radius = 1,
             blur = 2) %>% 
  addLegend(pal = pal, values = no2_diff_final$no2_diff_2014_2019,
                title="Annual Mean NO2 Change between 2014 and 2019")
no2_difference_log <- no2_difference %>% 
  mutate(no2_diff_2014_2019_log = log(no2_diff_2014_2019))
no2_difference_log %>% 
  ggplot() +
  geom_histogram(aes(x = no2_diff_2014_2019_log))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 35278 rows containing non-finite values (stat_bin).

ev_charging <- read_csv(here("raw_data/electric-vehicle-charging-device-statistics-july-2021/EVCD_02-Table 1.csv"), skip = 6) %>% 
  clean_names() %>% 
  dplyr::select(year, month, total_devices, rapid_devices) %>% 
  filter(rapid_devices != "NA") %>% 
  mutate(total_devices = as.numeric(total_devices),
         other_devices = total_devices - rapid_devices) %>% 
  select(-total_devices) %>% 
  pivot_longer(cols = c(other_devices, rapid_devices), names_to = "charger", values_to = "number_of_chargers") %>% 
  filter(month == "October" | month == "July" & year == "2021") %>% 
  mutate(charger = factor(charger, levels = c("rapid_devices", "other_devices")))
New names:
* `` -> ...5
* `` -> ...6
* `` -> ...7
* `` -> ...8
* `` -> ...9
Rows: 38 Columns: 9
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): Year, Month, ...5
lgl (4): ...6, ...7, ...8, ...9

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ev_charging %>% 
  ggplot() +
  aes(x = year, y = number_of_chargers, fill = charger) +
  geom_col() +
  scale_y_continuous(breaks = seq(0, 30000, 2500)) +
  coord_flip() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 270, vjust = 0.5, hjust=1)) +
  scale_fill_manual(labels = c("Rapid Charger", "Standard Charger"), values = c("#e41a1c", "#4daf4a")) +
  labs(title = "\nNumber of Electric Vehicle Chargers Growth in the UK\n",
       x = "\nYear\n",
       y = "\nNumber of Chargers\n",
       fill = "Charger Type") +
  png("no_chargers_time_uk.png", units="in", width=8, height=5, res=300)
ev_charging_rapid <- read_csv(here("raw_data/electric-vehicle-charging-device-statistics-july-2021/EVCD_01b-Table 1.csv"), skip = 6) %>% 
  clean_names() %>% 
  select(la_region_code, local_authority_region_name, x6) %>% 
  rename("count_rapid" = x6)
New names:
* `` -> ...4
* `` -> ...6
* `` -> ...8
* `` -> ...10
* `` -> ...12
* ...
Rows: 448 Columns: 32
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (18): LA / Region Code, Local Authority / Region Name, Jul-21, ...4, Apr-21, ...6, Jan-21, ...8, Oct-20, ...10, Jul...
lgl (14): ...19, ...20, ...21, ...22, ...23, ...24, ...25, ...26, ...27, ...28, ...29, ...30, ...31, ...32

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ev_charging_total <- read_csv(here("raw_data/electric-vehicle-charging-device-statistics-july-2021/EVCD_01a-Table 1.csv"), skip = 6) %>% 
  clean_names() %>% 
  select(la_region_code, local_authority_region_name, x6) %>% 
  rename("count_total" = x6)
New names:
* `` -> ...4
* `` -> ...6
* `` -> ...8
* `` -> ...10
* `` -> ...12
* ...
Rows: 448 Columns: 30
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (18): LA / Region Code, Local Authority / Region Name, Jul-21, ...4, Apr-21, ...6, Jan-21, ...8, Oct-20, ...10, Jul...
lgl (12): ...19, ...20, ...21, ...22, ...23, ...24, ...25, ...26, ...27, ...28, ...29, ...30

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ev_charging_geo <- inner_join(ev_charging_rapid, ev_charging_total, by = "la_region_code") %>% 
  drop_na() %>% 
  select(la_region_code, local_authority_region_name.x, count_rapid, count_total) %>% 
  filter(str_detect(local_authority_region_name.x, "[a-z][a-z]"), 
         count_rapid != "-") %>% 
  mutate(local_authority_region_name.x = str_remove_all(local_authority_region_name.x, "[(](Met County)[)]"),
         local_authority_region_name.x = str_remove_all(local_authority_region_name.x, "[(]abolished \\w+ \\d+[)]"))
# Joining ev_charging_geo + shape file 
ev_charging_map <- ev_charging_geo %>% 
  inner_join(uk_shape_file, by = c("la_region_code" = "lad19cd")) %>% 
  mutate(across(c(count_rapid:count_total), as.numeric)) %>% 
  st_as_sf()
# Set colours and bins
pal <- colorBin("Reds", domain = ev_charging_map$count_total, bins = 10)

# Set labels
ev_charging_map_labels <- sprintf(
  "<strong>%s</strong><br/>%g Chargers per 100k Population",
  ev_charging_map$local_authority_region_name.x, ev_charging_map$count_total, ev_charging_map$count_rapid) %>% 
  lapply(htmltools::HTML)
# Geospatial of EV Chargers Per 100k people in the UK April 21
ev_charging_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(count_total),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = ev_charging_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~count_total, opacity = 0.7, title = NULL,
            position = "bottomright")
# Set colours and bins
pal <- colorBin("Reds", domain = ev_charging_map$count_rapid, bins = 10)

# Set labels
ev_charging_map_labels <- sprintf(
  "<strong>%s</strong><br/>%g Chargers",
  ev_charging_map$local_authority_region_name.x, ev_charging_map$count_rapid, ev_charging_map$count_total) %>% 
  lapply(htmltools::HTML)
# Geospatial of EV Rapid Chargers per 100k people in the UK April 21
ev_charging_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(count_rapid),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = ev_charging_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~count_rapid, opacity = 0.7, title = NULL,
            position = "bottomright")
uk_ev_map_2021 <- uk_ev_map %>% 
  select(ons_la_code_apr_2019, region_local_authority_apr_2019_3, x2021_q1, geometry)

# filter for desired polygon
highlands <- uk_ev_map_2021 %>% 
  filter(region_local_authority_apr_2019_3 == "Highland") 

# st_join seems less dirty
ev_no2 <- final %>% 
  st_as_sf(coords = c("lon", "lat"), crs = st_crs(highlands)) %>% 
  st_join(uk_ev_map_2021, join = st_intersects, left = FALSE) %>% 
  group_by(region_local_authority_apr_2019_3) %>% 
  mutate(mean_no2 = sum(no2))
ev_no2 %>% 
  ggplot() +
  aes(x = x2021_q1, y = mean_no2) +
  geom_point() +
  geom_smooth(se = TRUE)
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

population <- read_csv(here("raw_data/ukpopestimatesmid2020on2021geography/MYE2 - Persons-Table 1.csv"), skip = 7) %>% 
  clean_names() %>% 
  select(code, name, all_ages) %>% 
  mutate(pop_per_100k = all_ages/100000)
New names:
* `` -> ...96
* `` -> ...97
* `` -> ...98
* `` -> ...99
* `` -> ...100
Rows: 420 Columns: 100
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): Code, Name, Geography
lgl (5): ...96, ...97, ...98, ...99, ...100

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ev_no2_tbl <- tibble(ev_no2) %>% 
  select(-c(no2, geometry)) %>% 
  unique()
ev_pop_100k <- ev_no2_tbl %>% 
  left_join(population, by = c("ons_la_code_apr_2019" = "code")) %>% 
  mutate(ev_per_100k = x2021_q1/pop_per_100k) %>% 
  drop_na()
ev_pop_100k_log <- ev_pop_100k %>% 
  mutate(ev_per_100k_log = log(ev_per_100k),
         mean_no2_log = log(mean_no2))
ev_pop_100k_log %>% 
  ggplot() +
  aes(x = ev_per_100k_log, y = mean_no2_log) +
  geom_point() +
  geom_smooth(se = FALSE, method = lm, colour = "#08a44c") +
  theme_minimal() +
  labs(title = "\nElectric Vehicles vs Mean NO2 Conc for each UK Local Authority in 2019\n",
       x = "\nElectric Vehicle per 100k population (log)\n",
       y = "\nMean NO2 Conc (log)\n") +
  png("ev_no2_log.png", units="in", width=8, height=5, res=300)
`geom_smooth()` using formula 'y ~ x'
library(cluster)
library(factoextra)
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
ev_scale <- ev_pop_100k_log %>% 
  select(region_local_authority_apr_2019_3, x2021_q1, mean_no2) %>% 
  mutate_if(is.numeric, scale)
ev_pop_100k_scale_log <- ev_pop_100k_log %>% 
  select(region_local_authority_apr_2019_3, ev_per_100k, mean_no2) %>% 
  mutate_if(is.numeric, scale)
ev_scale_log %>% 
  ggplot() +
  aes(x = x2021_q1, y = mean_no2) +
  geom_point() +
  geom_smooth(method = lm)
Error in ggplot(.) : object 'ev_scale_log' not found
ev_pop_100k_scale_log %>% 
  ggplot() +
  aes(x = ev_per_100k, y = mean_no2) +
  geom_point() +
  geom_smooth(method = lm)
`geom_smooth()` using formula 'y ~ x'

library(corrplot)
corrplot 0.90 loaded
ev_pop_100k_scale_numeric <- ev_pop_100k_scale %>% 
  select(-region_local_authority_apr_2019_3)
corrplot(cor(ev_pop_100k_scale_numeric), method = "number", type = "lower")

car_by_fuel_gb <- read_ods(here("raw_data/car_by_fuel_by_year_ALL_UK.ods"), skip = 7) %>% 
  clean_names() %>% 
  head(58) %>% 
  mutate(across(c(petrol:zero_emission8), as.numeric)) %>% 
  filter(petrol >= 100) %>% 
  mutate(traditional_fuel = total - alternative_fuels7 - zero_emission8) %>% 
  select(c("year", "petrol", "diesel", "alternative_fuels7", "zero_emission8")) %>% 
  pivot_longer(cols = c(petrol:zero_emission8), names_to = c("fuel_type"), values_to = c("count"))
car_by_fuel_gb %>% 
  ggplot() +
  aes(x = year, y = count, colour = fuel_type) +
  geom_smooth(method = lm) +
  geom_point() 
`geom_smooth()` using formula 'y ~ x'

new_car_by_fuel_gb <- read_ods(here("raw_data/new_car_by_fuel.ods"), skip = 7) %>% 
  clean_names() %>% 
  head(21) %>% 
  mutate(across(c(petrol:zero_emission8), as.numeric)) %>% 
  filter(petrol >= 100) %>% 
  mutate(traditional_fuel = total - alternative_fuels7 - zero_emission8) %>% 
  select(c("date", "petrol", "diesel", "alternative_fuels7", "zero_emission8")) %>% 
  pivot_longer(cols = c(petrol:zero_emission8), names_to = c("fuel_type"), values_to = c("count")) %>% 
  mutate(date = as.numeric(date))
new_car_by_fuel_gb %>% 
  ggplot() +
  aes(x = date, y = count, colour = fuel_type) +
  geom_line(aes(group = fuel_type), size = 1.25) +
  theme_minimal() +
  scale_y_continuous(breaks = seq(0, 2250, 250)) +
  scale_x_continuous(limits = c(2001, 2021), breaks = seq(2001, 2020, 1)) + 
  labs(title = "\nNew Car Registrations by Fuel Type\n",
       x = "\nYear\n",
       y = "\nNumber of Cars ('000s)\n",
       colour = "Fuel Type") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
  panel.grid.minor = element_blank()) +
  scale_colour_manual(labels = c("Alternative Fuels", "Diesel", "Petrol", "Zero Emission"), values = c("#abdda4", "#d7191c", "#fdae61", "#2b83ba")) +
  theme(legend.position = "none")  +
  annotate("text", x = 2006, y = 1750, label = "Petrol", colour = "#fdae61", fontface = 2) +
  annotate("text", x = 2005, y = 1100, label = "Diesel", colour = "#d7191c", fontface = 2) +
  annotate("text", x = 2016, y = 300, label = "Alternative Fuels", colour = "#abdda4", fontface = 2) +
  annotate("text", x = 2008, y = 150, label = "Zero Emissions", colour = "#2b83ba", fontface = 2) +
annotate("text", x = 2020.60, y = 950, label = "987", colour = "#fdae61", fontface = 2) +
  annotate("text", x = 2020.60, y = 250, label = "295", colour = "#d7191c", fontface = 2) +
  annotate("text", x = 2020.60, y = 450, label = "338", colour = "#abdda4", fontface = 2) +
  annotate("text", x = 2020.60, y = 50, label = "106", colour = "#2b83ba", fontface = 2) + 
png("test.png", units="in", width=7, height=5, res=300)
# insert ggplot code
dev.off()
null device 
          1 
uk_ev_top <- uk_ev_top %>% 
  left_join(uk_shape_file, by = c("ons_la_code_apr_2019" = "lad19cd")) %>% 
  drop_na() %>% 
  st_as_sf()
# Converting NO2 diff from x y to lat long
library(proj4)
proj4string <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"

no2_all_clean <- no2_clean %>% 
  rowid_to_column()

# Source data
xy <- no2_all_clean %>% 
  dplyr::select(x, y, rowid)

# Transformed data
pj <- project(xy, proj4string, inverse=TRUE)
Warning in project(xy, proj4string, inverse = TRUE) :
  more than two dimensions found, using first two
latlon <- data.frame(xy, lat=pj$y, lon=pj$x)
no2_all_final <-  merge(no2_all_clean, latlon, by.x = "rowid", by.y = "rowid") %>%
  dplyr::select(lat, lon, no2, year) 
uk_ev_top_2021 <- uk_ev_top %>% 
  select(ons_la_code_apr_2019, region_local_authority_apr_2019_3, year, no_of_ev)

# filter for desired polygon
#highlands <- uk_ev_stockport %>% 
 # filter(region_local_authority_apr_2019_3 == "Highland") 

# st_join seems less dirty
ev_no2_all_years <- no2_all_final %>% 
  st_as_sf(coords = c("lon", "lat"), crs = st_crs(uk_ev_top_2021)) %>%
  st_join(uk_ev_top_2021, join = st_intersects, left = FALSE) 
ev_no2_all_year_top <- tibble(ev_no2_all_years) %>% 
  filter(year.x == year.y) %>% 
  mutate(no2 = as.numeric(no2)) %>% 
  group_by(region_local_authority_apr_2019_3, year.x) %>% 
  mutate(mean_no2 = mean(no2))
ev_no2_all_year_top %>% 
  ggplot() +
  geom_line(aes(x = year.x, y = no_of_ev, colour = region_local_authority_apr_2019_3)) +
  scale_x_continuous(breaks = c(2011:2019)) +
  scale_y_continuous(breaks = seq(0, 2750, 250)) +
  labs(title = "High EV Local Authority over Years",
       x = "Year",
       y = "Number of Electric Vehicles",
       colour = "Local Authority") +
  theme_minimal()

ev_no2_all_year_top %>% 
  ggplot() +
  geom_line(aes(x = year.x, y = mean_no2, colour = region_local_authority_apr_2019_3)) +
  scale_x_continuous(breaks = c(2011:2019)) +
  scale_y_continuous(breaks = c(10:28)) +
  labs(title = "Mean NO2 per Year by Local Authority",
       x = "Year",
       y = "Mean NO2",
       colour = "Local Authority") +
  theme_minimal()

ev_charging_no2_log <- ev_charging_no2 %>% 
  mutate(count_rapid_log = log(count_rapid),
         count_total_log = log(count_total),
         mean_no2_log = log(mean_no2))
ev_charging_no2 %>% 
  ggplot() +
  aes(x = count_rapid, y = mean_no2) +
  geom_point() +
  geom_smooth(se = TRUE, method = lm)
`geom_smooth()` using formula 'y ~ x'

ev_charging_no2 %>% 
  ggplot() +
  aes(x = count_total, y = mean_no2) +
  geom_point() +
  geom_smooth(se = TRUE, method = lm)
`geom_smooth()` using formula 'y ~ x'

ev_charging_no2_log %>% 
  ggplot() +
  aes(x = count_rapid_log, y = mean_no2_log) +
  geom_point() +
  geom_smooth(se = TRUE, method = lm)
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 3772 rows containing non-finite values (stat_smooth).

ev_charging_no2_log %>% 
  ggplot() +
  aes(x = count_total_log, y = mean_no2_log) +
  geom_point() +
  geom_smooth(se = TRUE, method = lm, colour = "#08a44c") +
  theme_minimal() +
  labs(title = "\nEV Chargers vs Mean NO2 Conc for each UK Local Authority in 2019\n",
       x = "\nTotal Number of EV Chargers (log)\n",
       y = "\nMean NO2 Conc (log)\n") +
  png("ev_charger_no2_log.png", units="in", width=8, height=5, res=300)
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 18 rows containing non-finite values (stat_smooth).
uk_ev_income %>% 
  ggplot() +
  aes(x = x2021_q1_log, y = mean_total_annual_income) +
  geom_point() +
  geom_smooth(method = lm, colour = "#08a44c", se = FALSE) +
  theme_minimal() +
  labs(title = "\nEVs vs Mean Annual Income for Local Authorities in England and Wales\n",
       x = "\nNumber of Electric Vehicles (log)\n",
       y = "\nMean Total Annual Income\n") +
  png("ev_income.png", units="in", width=8, height=5, res=300)
`geom_smooth()` using formula 'y ~ x'
---
title: "R Notebook"
output: html_notebook
---

```{r}
library(janitor)
library(readODS)
library(httr)
library(here)
library(data.table)
library(sf)
library(leaflet)
library(leaflet.extras)
library(tidyverse)
library(grDevices)
```

```{r}
# loading in shape file 
uk_shape_file <- st_read(here("raw_data/LA_shape/Local_Authority_Districts__April_2019__UK_BFE_v2.shp")) %>%
  clean_names() %>% 
  st_simplify(dTolerance = 1000) %>%
  st_transform("+proj=longlat +datum=WGS84") %>% 
  dplyr::select(lad19cd, long, lat, geometry) 
```

```{r}
# Load in clean Electric Vehicles by Local Authority data
uk_ev <- read_csv(here("clean_data/ev_by_la_clean.csv"))
# Joining uk_ev + shape file 
uk_ev_map <- uk_ev %>% 
  left_join(uk_shape_file, by = c("ons_la_code_apr_2019" = "lad19cd")) %>% 
  drop_na() %>% 
  mutate(across(c(x2021_q1:x2011_q4), as.numeric)) %>% 
  st_as_sf()
```

```{r}
# Set colours and bins
pal <- colorBin("Greens", domain = uk_ev_map$x2021_q1, bins = c(0, 500, 1000, 2500, 5000, 10000, 15000))

# Set labels
uk_ev_map_labels <- sprintf(
  "<strong>%s</strong><br/>%g Electric Vehicles",
  uk_ev_map$region_local_authority_apr_2019_3, uk_ev_map$x2021_q1) %>% 
  lapply(htmltools::HTML)
```

```{r}
# Geospatial of EV Vehicles in the UK 2021 Q1
uk_ev_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(x2021_q1),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = uk_ev_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~x2021_q1, opacity = 0.7, title = NULL,
            position = "bottomright")
```

```{r}
    pal <- colorBin("Greens", domain = uk_ev_map$x2021_q1, bins = c(0, 500, 1000, 2500, 5000, 10000, 15000))
    
    uk_ev_map_labels <- sprintf(
      "<strong>%s</strong><br/>%g Electric Vehicles",
      uk_ev_map$region_local_authority_apr_2019_3, uk_ev_map$x2021_q1) %>% 
      lapply(htmltools::HTML)
```

```{r}
# Geospatial of EV Vehicles in the UK 2021 Q1
uk_ev_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(x2021_q1),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = uk_ev_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~x2021_q1, opacity = 0.7, title = NULL,
            position = "bottomright")
```
  
```{r}
# Wrangling to create an EV count over time plot 
uk_ev_longer <- uk_ev %>%
  # Pivot longer to get year and count columns
  pivot_longer(cols = c(x2021_q1:x2011_q4), names_to = c("year"), values_to = "no_of_ev") %>% 
  # Filter so we only have UK as a whole data AND we only want final numbers of the year so Q4 
  filter(region_local_authority_apr_2019_3 == "United Kingdom" & str_detect(year, "q4")) %>% 
  # Simplify to just show year
  mutate(year = case_when(str_detect(year, "2021") ~ "2021",
         str_detect(year, "2020") ~ "2020",
         str_detect(year, "2019") ~ "2019",
         str_detect(year, "2018") ~ "2018",
         str_detect(year, "2017") ~ "2017",
         str_detect(year, "2016") ~ "2016",
         str_detect(year, "2015") ~ "2015",
         str_detect(year, "2014") ~ "2014",
         str_detect(year, "2013") ~ "2013",
         str_detect(year, "2012") ~ "2012",
         str_detect(year, "2011") ~ "2011"),
         year = as.numeric(year),
         no_of_ev = as.numeric(no_of_ev))
```


```{r}
uk_ev_longer %>% 
  ggplot() +
  aes(x = year, y = no_of_ev) +
  geom_col(fill = "#08a44c") +
  scale_x_continuous(breaks = c(2011:2020)) +
  scale_y_continuous(breaks = seq(0, 220000, by = 20000), limits = c(0, 220000)) +
  labs(title = "\nNumber of Electric Vehicles over time in the UK\n",
       x = "\nYear\n",
       y = "\nNumber of Electric Vehicles\n") +
  theme_minimal() 
```

  


# Row binding grid NO2 data 

```{r}
no2_2010 <- read_csv(here("raw_data/no2_by_grid_2010.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2011 <- read_csv(here("raw_data/no2_by_grid_2011.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2012 <- read_csv(here("raw_data/no2_by_grid_2012.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2013 <- read_csv(here("raw_data/no2_by_grid_2013.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2014 <- read_csv(here("raw_data/no2_by_grid_2014.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2015 <- read_csv(here("raw_data/no2_by_grid_2015.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2016 <- read_csv(here("raw_data/no2_by_grid_2016.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2017 <- read_csv(here("raw_data/no2_by_grid_2017.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2018 <- read_csv(here("raw_data/no2_by_grid_2018.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2019 <- read_csv(here("raw_data/no2_by_grid_2019.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))

# Create empty data frame
no2_all <- data_frame()

# For each year, bind rows to one dataset
for (i in 2010:2019) {
  df_name <- paste0("no2_", i)
  df_input <- as.name(df_name)
  
  df <- eval(df_input) %>% 
    mutate(year = i)
  
no2_all <- bind_rows(no2_all, df)
  }
```

```{r}
# Remove missing NO2 grid values
no2_clean <- no2_all %>% 
  filter(no2 != "MISSING")
```

```{r}
library(proj4)
proj4string <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"

no2_clean_row <- no2_clean %>% 
  rowid_to_column()

# Source data
xy <- no2_clean_row %>% 
  select(x, y, rowid)

# Transformed data
pj <- project(xy, proj4string, inverse=TRUE)
latlon <- data.frame(xy, lat=pj$y, lon=pj$x)
final <-  merge(no2_clean_row, latlon, by.x = "rowid", by.y = "rowid") %>%
  filter(year == 2019) %>% 
  select(lat, lon, no2) 
```

```{r}
final <- final %>% 
  mutate(no2 = as.numeric(no2)) 
```

```{r}
bins <- 10
pal <- colorBin("Spectral", domain = final$no2, bins = bins, na.color = "transparent", reverse = TRUE)

leaflet() %>%
  addProviderTiles("CartoDB.Positron", options = providerTileOptions(noWrap = TRUE)) %>%
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addHeatmap(data = final,
             lng = ~lon,
             lat = ~lat,
             intensity = ~no2,
             minOpacity = 0.1,
             max = 45,
             radius = 1,
             blur = 1) %>% 
  addLegend(pal = pal, values = final$no2,
                title="Average NO2 Conc in 2019")
```

```{r}
no2_annual_mean <- read_ods(here("raw_data/NO2_tables.ods"), sheet = 3, skip = 2) %>% 
  clean_names()
```

```{r}
no2_annual_mean_all <- no2_annual_mean %>% 
  filter(site == "All sites") %>% 
  mutate(year = as.numeric(year))
```

```{r}
no2_annual_mean_all %>% 
  ggplot() +
  aes(x = year, y = annual_mean_no2concentration_mg_m3) + 
  geom_line() +
  theme_minimal() +
  scale_y_continuous(breaks = seq(0, 65, 5), limits = c(22, 62)) +
  scale_x_continuous(breaks = seq(1997, 2020, 1)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(x = "\nYear\n", 
       y = "\nAnnual Mean NO2 Conc mg m3\n",
       title = "\nNO2 over time in the UK\n") +
png("no2_time_uk.png", units="in", width=8, height=5, res=300)
# insert ggplot code
dev.off()
```


```{r}
library(tidyverse)
library(fable)
library(tsibble)
library(tsibbledata)
library(ggfortify)
```

```{r}
forecast_no2 <- no2_annual_mean_all %>% 
  dplyr::select(year, annual_mean_no2concentration_mg_m3) %>% 
  tsibble(index = year)
```

```{r}
autoplot(forecast_no2) 
```

```{r}
fit <- forecast_no2 %>%
  model(
    arima = ARIMA(annual_mean_no2concentration_mg_m3)
  )
fit
```

```{r}
forecast_1 <- fit %>%
  fabletools::forecast(h = "15 years")
forecast_1
```

```{r}
forecast_1 %>%
  autoplot(forecast_no2, level = NULL) +
  ggtitle("Forecasts for NO2 Conc over time") +
  xlab("Year") +
  guides(colour = guide_legend(title = "Forecast")) +
    theme_minimal() +
  scale_y_continuous(breaks = seq(0, 65, 5), limits = c(0, 62)) +
  scale_x_continuous(breaks = seq(1997, 2034, 3)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(x = "\nYear\n", 
       y = "\nAnnual Mean NO2 Conc mg m3\n",
       title = "\nNO2 over time in the UK\n") +
  scale_y_continuous(limit = c(0, 65)) +
  png("no2_time_forecast.png", units="in", width=8, height=5, res=300)
# insert ggplot code
dev.off()
```

```{r}
# Now set our training data from 1992 to 2006
train <- forecast_no2 %>%
  filter(year <= 2017)

# run the model on the training set 
fit_train <- train %>%
  model(
    arima = ARIMA(annual_mean_no2concentration_mg_m3)
  )
```

```{r}
# forecast from the training set
forecast_test <- fit_train %>% 
  fabletools::forecast(h = "4 years")

# Plot forecasts against actual values
forecast_test %>%
  autoplot(train, level = NULL) +
    autolayer(filter(forecast_no2, year >= 2017), color = "black") +
    ggtitle("Forecasts for NO2 Conc over time") +
    xlab("Year") + ylab("Megalitres") +
    guides(colour=guide_legend(title="Forecast"))
```

```{r}
# Binding 2014 and 2019
no2_2014 <- read_csv(here("raw_data/no2_by_grid_2014.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2")) %>% 
  add_column(year = "2014")
no2_2019 <- read_csv(here("raw_data/no2_by_grid_2019.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2")) %>% 
  add_column(year = "2019")

no2_difference <- bind_rows(no2_2014, no2_2019)
```

```{r}
# Diff in NO2 between 2014 & 2019
no2_difference <- no2_difference %>% 
  filter(no2 != "MISSING") %>% 
  mutate(no2 = as.numeric(no2),
         year = as.numeric(year)) %>% 
  group_by(x, y) %>% 
  pivot_wider(names_from = "year", values_from = "no2") %>% 
  rename("x2014" = "2014",
         "x2019" = "2019") %>% 
  mutate(no2_diff_2014_2019 = x2014 - x2019)  %>% 
#Remove this separation when put in cleaning script
  ungroup() %>% 
#Remove this separation when put in cleaning script
  drop_na() %>% 
#Remove this separation when put in cleaning script
# Changing all negative numbers to zero as these aren't of interest to us
  mutate(no2_diff_2014_2019 = if_else(no2_diff_2014_2019 < 0, 0, no2_diff_2014_2019))
```

```{r}
# Converting NO2 diff from x y to lat long
library(proj4)
proj4string <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"

no2_diff_row <- no2_difference %>% 
  rowid_to_column()

# Source data
xy <- no2_diff_row %>% 
  dplyr::select(x, y, rowid)

# Transformed data
pj <- project(xy, proj4string, inverse=TRUE)
latlon <- data.frame(xy, lat=pj$y, lon=pj$x)
no2_diff_final <-  merge(no2_diff_row, latlon, by.x = "rowid", by.y = "rowid") %>%
  dplyr::select(lat, lon, no2_diff_2014_2019) 
```

```{r}
# Geospatial of No2 diff 2014 to 2019
bins <- c(0, 5, 10, 15, 20, 25, 30)
pal <- colorBin("Spectral", domain = no2_diff_final$no2_diff_2014_2019, bins = bins, na.color = "transparent", reverse = TRUE)

leaflet() %>%
  addProviderTiles("CartoDB.Positron", options = providerTileOptions(noWrap = TRUE)) %>%
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addHeatmap(data = no2_diff_final,
             lng = ~lon,
             lat = ~lat,
             intensity = ~no2_diff_2014_2019,
             minOpacity = 0.1,
             max = 30,
             radius = 1,
             blur = 2) %>% 
  addLegend(pal = pal, values = no2_diff_final$no2_diff_2014_2019,
                title="Annual Mean NO2 Change between 2014 and 2019")
```

```{r}
no2_difference_log <- no2_difference %>% 
  mutate(no2_diff_2014_2019_log = log(no2_diff_2014_2019))
```

```{r}
no2_difference_log %>% 
  ggplot() +
  geom_histogram(aes(x = no2_diff_2014_2019_log))
```

```{r}
ev_charging <- read_csv(here("raw_data/electric-vehicle-charging-device-statistics-july-2021/EVCD_02-Table 1.csv"), skip = 6) %>% 
  clean_names() %>% 
  dplyr::select(year, month, total_devices, rapid_devices) %>% 
  filter(rapid_devices != "NA") %>% 
  mutate(total_devices = as.numeric(total_devices),
         other_devices = total_devices - rapid_devices) %>% 
  select(-total_devices) %>% 
  pivot_longer(cols = c(other_devices, rapid_devices), names_to = "charger", values_to = "number_of_chargers") %>% 
  filter(month == "October" | month == "July" & year == "2021") %>% 
  mutate(charger = factor(charger, levels = c("rapid_devices", "other_devices")))
```

```{r}
ev_charging %>% 
  ggplot() +
  aes(x = year, y = number_of_chargers, fill = charger) +
  geom_col() +
  scale_y_continuous(breaks = seq(0, 30000, 2500)) +
  coord_flip() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 270, vjust = 0.5, hjust=1)) +
  scale_fill_manual(labels = c("Rapid Charger", "Standard Charger"), values = c("#e41a1c", "#4daf4a")) +
  labs(title = "\nNumber of Electric Vehicle Chargers Growth in the UK\n",
       x = "\nYear\n",
       y = "\nNumber of Chargers\n",
       fill = "Charger Type") +
  png("no_chargers_time_uk.png", units="in", width=8, height=5, res=300)

```

```{r}
ev_charging_rapid <- read_csv(here("raw_data/electric-vehicle-charging-device-statistics-july-2021/EVCD_01b-Table 1.csv"), skip = 6) %>% 
  clean_names() %>% 
  select(la_region_code, local_authority_region_name, x6) %>% 
  rename("count_rapid" = x6)
ev_charging_total <- read_csv(here("raw_data/electric-vehicle-charging-device-statistics-july-2021/EVCD_01a-Table 1.csv"), skip = 6) %>% 
  clean_names() %>% 
  select(la_region_code, local_authority_region_name, x6) %>% 
  rename("count_total" = x6)

ev_charging_geo <- inner_join(ev_charging_rapid, ev_charging_total, by = "la_region_code") %>% 
  drop_na() %>% 
  select(la_region_code, local_authority_region_name.x, count_rapid, count_total) %>% 
  filter(str_detect(local_authority_region_name.x, "[a-z][a-z]"), 
         count_rapid != "-") %>% 
  mutate(local_authority_region_name.x = str_remove_all(local_authority_region_name.x, "[(](Met County)[)]"),
         local_authority_region_name.x = str_remove_all(local_authority_region_name.x, "[(]abolished \\w+ \\d+[)]"))
```

```{r}
# Joining ev_charging_geo + shape file 
ev_charging_map <- ev_charging_geo %>% 
  inner_join(uk_shape_file, by = c("la_region_code" = "lad19cd")) %>% 
  mutate(across(c(count_rapid:count_total), as.numeric)) %>% 
  st_as_sf()
```

```{r}
# Set colours and bins
pal <- colorBin("Reds", domain = ev_charging_map$count_total, bins = 10)

# Set labels
ev_charging_map_labels <- sprintf(
  "<strong>%s</strong><br/>%g Chargers per 100k Population",
  ev_charging_map$local_authority_region_name.x, ev_charging_map$count_total, ev_charging_map$count_rapid) %>% 
  lapply(htmltools::HTML)
```

```{r}
# Geospatial of EV Chargers Per 100k people in the UK April 21
ev_charging_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(count_total),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = ev_charging_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~count_total, opacity = 0.7, title = NULL,
            position = "bottomright")
```


```{r}
# Set colours and bins
pal <- colorBin("Reds", domain = ev_charging_map$count_rapid, bins = 10)

# Set labels
ev_charging_map_labels <- sprintf(
  "<strong>%s</strong><br/>%g Chargers",
  ev_charging_map$local_authority_region_name.x, ev_charging_map$count_rapid, ev_charging_map$count_total) %>% 
  lapply(htmltools::HTML)
```

```{r}
# Geospatial of EV Rapid Chargers per 100k people in the UK April 21
ev_charging_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(count_rapid),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = ev_charging_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~count_rapid, opacity = 0.7, title = NULL,
            position = "bottomright")
```

```{r}
uk_ev_map_2021 <- uk_ev_map %>% 
  select(ons_la_code_apr_2019, region_local_authority_apr_2019_3, x2021_q1, geometry)

# filter for desired polygon
highlands <- uk_ev_map_2021 %>% 
  filter(region_local_authority_apr_2019_3 == "Highland") 

# st_join seems less dirty
ev_no2 <- final %>% 
  st_as_sf(coords = c("lon", "lat"), crs = st_crs(highlands)) %>% 
  st_join(uk_ev_map_2021, join = st_intersects, left = FALSE) %>% 
  group_by(region_local_authority_apr_2019_3) %>% 
  mutate(mean_no2 = sum(no2))
```

```{r}
ev_no2 %>% 
  ggplot() +
  aes(x = x2021_q1, y = mean_no2) +
  geom_point() +
  geom_smooth(se = TRUE)
```

```{r}
population <- read_csv(here("raw_data/ukpopestimatesmid2020on2021geography/MYE2 - Persons-Table 1.csv"), skip = 7) %>% 
  clean_names() %>% 
  select(code, name, all_ages) %>% 
  mutate(pop_per_100k = all_ages/100000)
```

```{r}
ev_no2_tbl <- tibble(ev_no2) %>% 
  select(-c(no2, geometry)) %>% 
  unique()
```

```{r}
ev_pop_100k <- ev_no2_tbl %>% 
  left_join(population, by = c("ons_la_code_apr_2019" = "code")) %>% 
  mutate(ev_per_100k = x2021_q1/pop_per_100k) %>% 
  drop_na()
```

```{r}
ev_pop_100k_log <- ev_pop_100k %>% 
  mutate(ev_per_100k_log = log(ev_per_100k),
         mean_no2_log = log(mean_no2))
```

```{r}
ev_pop_100k_log %>% 
  ggplot() +
  aes(x = ev_per_100k_log, y = mean_no2_log) +
  geom_point() +
  geom_smooth(se = FALSE, method = lm, colour = "#08a44c") +
  theme_minimal() +
  labs(title = "\nElectric Vehicles vs Mean NO2 Conc for each UK Local Authority in 2019\n",
       x = "\nElectric Vehicle per 100k population (log)\n",
       y = "\nMean NO2 Conc (log)\n") +
  png("ev_no2_log.png", units="in", width=8, height=5, res=300)
```

```{r}
library(cluster)
library(factoextra)
```

```{r}
ev_scale <- ev_pop_100k_log %>% 
  select(region_local_authority_apr_2019_3, x2021_q1, mean_no2) %>% 
  mutate_if(is.numeric, scale)
```

```{r}
ev_pop_100k_scale_log <- ev_pop_100k_log %>% 
  select(region_local_authority_apr_2019_3, ev_per_100k, mean_no2) %>% 
  mutate_if(is.numeric, scale)
```

```{r}
ev_scale_log %>% 
  ggplot() +
  aes(x = x2021_q1, y = mean_no2) +
  geom_point() +
  geom_smooth(method = lm)
```

```{r}
ev_pop_100k_scale_log %>% 
  ggplot() +
  aes(x = ev_per_100k, y = mean_no2) +
  geom_point() +
  geom_smooth(method = lm)
```

```{r}
library(corrplot)
```
```{r}
ev_pop_100k_scale %>% 
  as_tibble() %>%
  pivot_longer(-region_local_authority_apr_2019_3,
               names_to = "type", 
               values_to = "value") %>% 
  drop_na() %>% 
  group_by(type) %>%
  summarise(mean = round(mean(value)), 
            sd = sd(value))
```
```{r}
ev_pop_100k_scale_numeric <- ev_pop_100k_scale %>% 
  select(-region_local_authority_apr_2019_3)
```


```{r}
corrplot(cor(ev_pop_100k_scale_numeric), method = "number", type = "lower")
```

```{r}
car_by_fuel_gb <- read_ods(here("raw_data/car_by_fuel_by_year_ALL_UK.ods"), skip = 7) %>% 
  clean_names() %>% 
  head(58) %>% 
  mutate(across(c(petrol:zero_emission8), as.numeric)) %>% 
  filter(petrol >= 100) %>% 
  mutate(traditional_fuel = total - alternative_fuels7 - zero_emission8) %>% 
  select(c("year", "petrol", "diesel", "alternative_fuels7", "zero_emission8")) %>% 
  pivot_longer(cols = c(petrol:zero_emission8), names_to = c("fuel_type"), values_to = c("count"))
```

```{r}
car_by_fuel_gb %>% 
  ggplot() +
  aes(x = year, y = count, colour = fuel_type) +
  geom_smooth(method = lm) +
  geom_point() 
```

```{r}
new_car_by_fuel_gb <- read_ods(here("raw_data/new_car_by_fuel.ods"), skip = 7) %>% 
  clean_names() %>% 
  head(21) %>%
  mutate(across(c(petrol:zero_emission8), as.numeric)) %>% 
  filter(petrol >= 100) %>% 
  mutate(traditional_fuel = total - alternative_fuels7 - zero_emission8) %>% 
  select(c("date", "petrol", "diesel", "alternative_fuels7", "zero_emission8")) %>% 
  pivot_longer(cols = c(petrol:zero_emission8), names_to = c("fuel_type"), values_to = c("count")) %>% 
  mutate(date = as.numeric(date))
```

```{r}
new_car_by_fuel_gb %>% 
  ggplot() +
  aes(x = date, y = count, colour = fuel_type) +
  geom_line(aes(group = fuel_type), size = 1.25) +
  theme_minimal() +
  scale_y_continuous(breaks = seq(0, 2250, 250)) +
  scale_x_continuous(limits = c(2001, 2021), breaks = seq(2001, 2020, 1)) + 
  labs(title = "\nNew Car Registrations by Fuel Type\n",
       x = "\nYear\n",
       y = "\nNumber of Cars ('000s)\n",
       colour = "Fuel Type") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
  panel.grid.minor = element_blank()) +
  scale_colour_manual(labels = c("Alternative Fuels", "Diesel", "Petrol", "Zero Emission"), values = c("#abdda4", "#d7191c", "#fdae61", "#2b83ba")) +
  theme(legend.position = "none")  +
  annotate("text", x = 2006, y = 1750, label = "Petrol", colour = "#fdae61", fontface = 2) +
  annotate("text", x = 2005, y = 1100, label = "Diesel", colour = "#d7191c", fontface = 2) +
  annotate("text", x = 2016, y = 300, label = "Alternative Fuels", colour = "#abdda4", fontface = 2) +
  annotate("text", x = 2008, y = 150, label = "Zero Emissions", colour = "#2b83ba", fontface = 2) +
annotate("text", x = 2020.60, y = 950, label = "987", colour = "#fdae61", fontface = 2) +
  annotate("text", x = 2020.60, y = 250, label = "295", colour = "#d7191c", fontface = 2) +
  annotate("text", x = 2020.60, y = 450, label = "338", colour = "#abdda4", fontface = 2) +
  annotate("text", x = 2020.60, y = 50, label = "106", colour = "#2b83ba", fontface = 2) + 
png("test.png", units="in", width=8, height=5, res=300)
# insert ggplot code
dev.off()
```

```{r}
uk_ev_top <- uk_ev %>% 
  mutate(across(c(x2021_q1:x2011_q4), as.numeric)) %>% 
  filter(region_local_authority_apr_2019_3 == "Milton Keynes" |
         region_local_authority_apr_2019_3 == "Swindon" |
         region_local_authority_apr_2019_3 == "Slough" | 
         region_local_authority_apr_2019_3 == "Leeds") %>% 
  pivot_longer(cols = c(x2021_q1:x2011_q4), names_to = c("year"), values_to = "no_of_ev") %>% 
  filter(str_detect(year, "q4")) %>% 
  mutate(year = str_sub(year, 2, -4))
```

```{r}
uk_ev_top <- uk_ev_top %>% 
  left_join(uk_shape_file, by = c("ons_la_code_apr_2019" = "lad19cd")) %>% 
  drop_na() %>% 
  st_as_sf()
```

```{r}
# Converting NO2 diff from x y to lat long
library(proj4)
proj4string <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"

no2_all_clean <- no2_clean %>% 
  rowid_to_column()

# Source data
xy <- no2_all_clean %>% 
  dplyr::select(x, y, rowid)

# Transformed data
pj <- project(xy, proj4string, inverse=TRUE)
latlon <- data.frame(xy, lat=pj$y, lon=pj$x)
no2_all_final <-  merge(no2_all_clean, latlon, by.x = "rowid", by.y = "rowid") %>%
  dplyr::select(lat, lon, no2, year) 
```

```{r}
uk_ev_top_2021 <- uk_ev_top %>% 
  select(ons_la_code_apr_2019, region_local_authority_apr_2019_3, year, no_of_ev)

# filter for desired polygon
#highlands <- uk_ev_stockport %>% 
 # filter(region_local_authority_apr_2019_3 == "Highland") 

# st_join seems less dirty
ev_no2_all_years <- no2_all_final %>% 
  st_as_sf(coords = c("lon", "lat"), crs = st_crs(uk_ev_top_2021)) %>%
  st_join(uk_ev_top_2021, join = st_intersects, left = FALSE) 
```

```{r}
ev_no2_all_year_top <- tibble(ev_no2_all_years) %>% 
  filter(year.x == year.y) %>% 
  mutate(no2 = as.numeric(no2)) %>% 
  group_by(region_local_authority_apr_2019_3, year.x) %>% 
  mutate(mean_no2 = mean(no2))
```

```{r}
ev_no2_all_year_top %>% 
  ggplot() +
  geom_line(aes(x = year.x, y = no_of_ev, colour = region_local_authority_apr_2019_3)) +
  scale_x_continuous(breaks = c(2011:2019)) +
  scale_y_continuous(breaks = seq(0, 2750, 250)) +
  labs(title = "High EV Local Authority over Years",
       x = "Year",
       y = "Number of Electric Vehicles",
       colour = "Local Authority") +
  theme_minimal()
```


```{r}
ev_no2_all_year_top %>% 
  ggplot() +
  geom_line(aes(x = year.x, y = mean_no2, colour = region_local_authority_apr_2019_3)) +
  scale_x_continuous(breaks = c(2011:2019)) +
  scale_y_continuous(breaks = c(10:28)) +
  labs(title = "Mean NO2 per Year by Local Authority",
       x = "Year",
       y = "Mean NO2",
       colour = "Local Authority") +
  theme_minimal()
```

```{r}
ev_charging_map_2021 <- ev_charging_map %>% 
  select(la_region_code, local_authority_region_name.x, count_rapid, count_total, geometry)

# st_join seems less dirty
ev_charging_no2 <- final %>% 
  st_as_sf(coords = c("lon", "lat"), crs = st_crs(ev_charging_map_2021)) %>% 
  st_join(ev_charging_map_2021, join = st_intersects, left = FALSE) %>% 
  group_by(local_authority_region_name.x) %>% 
  mutate(mean_no2 = mean(no2))
```
  
```{r}
ev_charging_no2_log <- ev_charging_no2 %>% 
  mutate(count_rapid_log = log(count_rapid),
         count_total_log = log(count_total),
         mean_no2_log = log(mean_no2))
```
  
```{r}
ev_charging_no2 %>% 
  ggplot() +
  aes(x = count_rapid, y = mean_no2) +
  geom_point() +
  geom_smooth(se = TRUE, method = lm)
```

```{r}
ev_charging_no2 %>% 
  ggplot() +
  aes(x = count_total, y = mean_no2) +
  geom_point() +
  geom_smooth(se = TRUE, method = lm)
```

```{r}
ev_charging_no2_log %>% 
  ggplot() +
  aes(x = count_rapid_log, y = mean_no2_log) +
  geom_point() +
  geom_smooth(se = TRUE, method = lm)
```

```{r}
ev_charging_no2_log %>% 
  ggplot() +
  aes(x = count_total_log, y = mean_no2_log) +
  geom_point() +
  geom_smooth(se = TRUE, method = lm, colour = "#08a44c") +
  theme_minimal() +
  labs(title = "\nEV Chargers vs Mean NO2 Conc for each UK Local Authority in 2019\n",
       x = "\nTotal Number of EV Chargers (log)\n",
       y = "\nMean NO2 Conc (log)\n") +
  png("ev_charger_no2_log.png", units="in", width=8, height=5, res=300)
```

```{r}
income_england <- read_csv(here("raw_data/incomeestimatesforsmallareasdatasetfinancialyearending20181 2/Total annual income-Table 1.csv"), skip = 4) %>% 
  clean_names() %>% 
  select(local_authority_code, local_authority_name, total_annual_income) %>% 
  group_by(local_authority_code) %>% 
  mutate(mean_total_annual_income =  mean(total_annual_income)) %>% 
  select(-total_annual_income) %>% 
  unique()
```

```{r}
uk_ev_income <- uk_ev %>% 
  inner_join(income_england, by = c("ons_la_code_apr_2019" = "local_authority_code")) %>% 
  select(region_local_authority_apr_2019_3, x2021_q1, mean_total_annual_income) %>% 
  mutate(x2021_q1 = as.numeric(x2021_q1),
         x2021_q1_log = log(x2021_q1))
```

```{r}
uk_ev_income %>% 
  ggplot() +
  aes(x = x2021_q1_log, y = mean_total_annual_income) +
  geom_point() +
  geom_smooth(method = lm, colour = "#08a44c", se = FALSE) +
  theme_minimal() +
  labs(title = "\nEVs vs Mean Annual Income for Local Authorities in England and Wales\n",
       x = "\nNumber of Electric Vehicles (log)\n",
       y = "\nMean Total Annual Income\n") +
  png("ev_income.png", units="in", width=8, height=5, res=300)
```

